### TEST: glimmav1 dataset (MArrayLM) ###
library(Glimma)
library(limma)
library(GlimmaV2)
data(lymphomaRNAseq)
rnaseq <- lymphomaRNAseq
# add lane
groups <- data.frame(genotype=rnaseq$samples$group,
lane= as.character(c(rep(4,5),3,3)),
miscCont=c(rep(4000,5),300,250),
miscDisc=c("blue","red",rep("green",5)))
# add libsize
groups <- rnaseq$samples$group
# fit
design <- model.matrix(~0+groups)
contrasts <- cbind(Smchd1null.vs.WT=c(-1,1))
# convert raw counts to logCPM values by automatically extracting libsizes and normalisation factors from x
vm <- voomWithQualityWeights(rnaseq, design=design)
fit <- lmFit(vm, design=design)
fit <- contrasts.fit(fit, contrasts)
fit <- eBayes(fit)
dtFit <- decideTests(fit)
class(fit)
## [1] "MArrayLM"
## attr(,"package")
## [1] "limma"
# general plot
library("tidyverse")
## -- Attaching packages ----------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.2 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
formatCounts <- function(counts, anno)
{
anno <- cbind(anno, sample=colnames(counts))
counts %>%
as_tibble() %>%
mutate(gene = rownames(counts)) %>%
pivot_longer(colnames(counts), names_to = "sample", values_to = "count") %>%
left_join(anno)
split_counts <- counts %>%
as_tibble() %>%
mutate(gene = rownames(counts)) %>%
pivot_longer(colnames(counts), names_to = "sample", values_to = "count") %>%
left_join(anno) %>%
group_split(gene, .keep = FALSE)
return(split_counts)
}
x <- formatCounts(rnaseq$counts, as.data.frame(rnaseq$samples$group))
## Joining, by = "sample"
## Joining, by = "sample"
glimmaMA(fit, counts=rnaseq$counts, groups=rnaseq$samples$group)
# verify against limma
plotMD(fit, status=dtFit)

### TEST: rnaseq-123 dataset (MArrayLM) ###
library(edgeR, quietly=TRUE)
library(Mus.musculus, quietly=TRUE)
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:grDevices':
##
## windows
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
##
library(GlimmaV2, quietly=TRUE)
# load data
files <- c("GSM1545535_10_6_5_11.txt", "GSM1545536_9_6_5_11.txt",
"GSM1545538_purep53.txt", "GSM1545539_JMS8-2.txt",
"GSM1545540_JMS8-3.txt", "GSM1545541_JMS8-4.txt",
"GSM1545542_JMS8-5.txt", "GSM1545544_JMS9-P7c.txt",
"GSM1545545_JMS9-P8c.txt")
x <- readDGE(files, columns=c(1,3))
# add sample info
samplenames <- substring(colnames(x), 12, nchar(colnames(x)))
colnames(x) <- samplenames
group <- as.factor(c("LP", "ML", "Basal", "Basal", "ML", "LP",
"Basal", "ML", "LP"))
x$samples$group <- group
lane <- as.factor(rep(c("L004","L006","L008"), c(3,4,2)))
x$samples$lane <- lane
# add gene info
geneid <- rownames(x)
genes <- select(Mus.musculus, keys=geneid, columns=c("SYMBOL", "TXCHROM"),
keytype="ENTREZID")
## 'select()' returned 1:many mapping between keys and columns
genes <- genes[!duplicated(genes$ENTREZID),]
x$genes <- genes
# transformations from raw scale
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)
# remove lowly expressed genes
keep.exprs <- filterByExpr(x, group=group)
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
# normalising gene expression distributions
x <- calcNormFactors(x, method = "TMM")
# generate design
design <- model.matrix(~0+group+lane)
colnames(design) <- gsub("group", "", colnames(design))
contr.matrix <- makeContrasts(
BasalvsLP = Basal-LP,
BasalvsML = Basal - ML,
LPvsML = LP - ML,
levels = colnames(design))
par(mfrow=c(1,2))
v <- voom(x, design)
vfit <- lmFit(v, design)
vfit <- contrasts.fit(vfit, contrasts=contr.matrix)
efit <- eBayes(vfit)
tfit <- treat(vfit, lfc=1)
dt <- decideTests(tfit)
# test general plot (final column)
glimmaMA(tfit)
plotMD(tfit, status=dt[,3])

# test applying coef
glimmaMA(tfit, status=dt, coef=2)
plotMD(tfit, column=2, status=dt[,2], main=colnames(tfit)[2])

# test status.colours
glimmaMA(tfit, status=dt, status.colours=c("#DD7373","#3B3561","#51A3A3"))
## TEST: DGEExact ##
d <- matrix(rnbinom(16,size=1,mu=10),4,4)
rownames(d) <- c("a","b","c","d")
colnames(d) <- c("A1","A2","B1","B2")
d <- DGEList(counts=d,group=factor(c("A","A","B","B")))
d <- estimateCommonDisp(d)
results <- exactTest(d)
class(results)
## [1] "DGEExact"
## attr(,"package")
## [1] "edgeR"
glimmaMA(results)
plotMD(results)

## TEST: DGELRT ##
x <- estimateDisp(x, design)
xGlm <- glmFit(x, design)
lrt <- glmLRT(xGlm)
lrt
## An object of class "DGELRT"
## $coefficients
## Basal LP ML laneL006 laneL008
## 497097 -12.02957 -17.35909 -17.051247 0.474294727 0.35230334
## 20671 -14.08464 -16.38004 -15.896113 0.892919287 1.76232448
## 27395 -10.73099 -11.07025 -10.765272 -0.102443892 0.03628641
## 18777 -10.20812 -10.40762 -9.941686 0.009848289 0.13404240
## 21399 -10.19313 -10.44680 -10.460501 -0.012359432 0.10733490
## 16619 more rows ...
##
## $fitted.values
## Samples
## Tags 10_6_5_11 9_6_5_11 purep53 JMS8-2 JMS8-3 JMS8-4
## 497097 0.790095 1.347819 355.68529 515.1586 4.620131 2.416674
## 20671 2.196067 4.412337 45.28392 100.7132 23.225142 10.314581
## 27395 457.439229 764.721838 1306.25491 1059.5105 1468.007553 783.563915
## 18777 887.463665 1742.665035 2203.56759 1999.7633 3742.946326 1700.851849
## 21399 853.361528 1037.238862 2236.84819 1985.3792 2178.879386 1599.571632
## Samples
## Tags JMS8-5 JMS9-P7c JMS9-P8c
## 497097 526.2100 1.230457 0.7541756
## 20671 102.8737 16.786348 8.7355745
## 27395 1082.2396 506.716744 316.9227837
## 18777 2042.6630 1273.301475 677.9945432
## 21399 2027.9703 737.899631 634.7604596
## 16619 more rows ...
##
## $deviance
## [1] 2.885355 6.802933 3.951154 3.048092 1.714055
## 16619 more elements ...
##
## $iter
## [1] 9 9 4 5 4
## 16619 more elements ...
##
## $failed
## [1] FALSE FALSE FALSE FALSE FALSE
## 16619 more elements ...
##
## $method
## [1] "levenberg"
##
## $unshrunk.coefficients
## Basal LP ML laneL006 laneL008
## 497097 -12.03199 -17.43168 -17.106427 0.477327156 0.35675279
## 20671 -14.09308 -16.40941 -15.920510 0.906223262 1.78401646
## 27395 -10.73111 -11.07043 -10.765403 -0.102458543 0.03629495
## 18777 -10.20820 -10.40771 -9.941744 0.009849778 0.13405250
## 21399 -10.19321 -10.44689 -10.460597 -0.012359281 0.10734525
## 16619 more rows ...
##
## $df.residual
## [1] 4 4 4 4 4
## 16619 more elements ...
##
## $design
## Basal LP ML laneL006 laneL008
## 1 0 1 0 0 0
## 2 0 0 1 0 0
## 3 1 0 0 0 0
## 4 1 0 0 1 0
## 5 0 0 1 1 0
## 6 0 1 0 1 0
## 7 1 0 0 1 0
## 8 0 0 1 0 1
## 9 0 1 0 0 1
## attr(,"assign")
## [1] 1 1 1 2 2
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
##
## attr(,"contrasts")$lane
## [1] "contr.treatment"
##
##
## $offset
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## [1,] 17.19608 17.40491 17.90603 17.79913 18.15952 17.83674 17.82036 16.95706
## [,9]
## [1,] 16.7928
## attr(,"class")
## [1] "CompressedMatrix"
## attr(,"Dims")
## [1] 5 9
## attr(,"repeat.row")
## [1] TRUE
## attr(,"repeat.col")
## [1] FALSE
## 16619 more rows ...
##
## $dispersion
## [1] 0.04420639 0.27384159 0.03127786 0.02062430 0.01667028
## 16619 more elements ...
##
## $prior.count
## [1] 0.125
##
## $samples
## files group lib.size norm.factors lane
## 10_6_5_11 GSM1545535_10_6_5_11.txt LP 32857304 0.8943956 L004
## 9_6_5_11 GSM1545536_9_6_5_11.txt ML 35328624 1.0250186 L004
## purep53 GSM1545538_purep53.txt Basal 57147943 1.0459005 L004
## JMS8-2 GSM1545539_JMS8-2.txt Basal 51356800 1.0458455 L006
## JMS8-3 GSM1545540_JMS8-3.txt ML 75782871 1.0162707 L006
## JMS8-4 GSM1545541_JMS8-4.txt LP 60506774 0.9217132 L006
## JMS8-5 GSM1545542_JMS8-5.txt Basal 55073018 0.9961959 L006
## JMS9-P7c GSM1545544_JMS9-P7c.txt ML 21305254 1.0861026 L008
## JMS9-P8c GSM1545545_JMS9-P8c.txt LP 19955335 0.9839203 L008
##
## $genes
## ENTREZID SYMBOL TXCHROM
## 1 497097 Xkr4 chr1
## 5 20671 Sox17 chr1
## 6 27395 Mrpl15 chr1
## 7 18777 Lypla1 chr1
## 9 21399 Tcea1 chr1
## 16619 more rows ...
##
## $prior.df
## [1] 4.212451
##
## $AveLogCPM
## [1] 1.6038865 -0.4788595 4.2358837 5.3160202 5.0130895
## 16619 more elements ...
##
## $table
## logFC logCPM LR PValue
## 497097 0.50826629 1.6038865 0.17475701 0.67591825
## 20671 2.54249679 -0.4788595 7.85701514 0.00506239
## 27395 0.05235023 4.2358837 0.04353588 0.83471950
## 18777 0.19338230 5.3160202 0.91075337 0.33991461
## 21399 0.15485153 5.0130895 0.70779003 0.40017845
## 16619 more rows ...
##
## $comparison
## [1] "laneL008"
##
## $df.test
## [1] 1 1 1 1 1
## 16619 more elements ...
glimmaMA(lrt)
### Test: DESeqDataset ###
library("pasilla", quietly=TRUE)
library("DESeq2", quietly=TRUE)
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
library(GlimmaV2)
pasCts <- system.file("extdata",
"pasilla_gene_counts.tsv",
package="pasilla", mustWork=TRUE)
pasAnno <- system.file("extdata",
"pasilla_sample_annotation.csv",
package="pasilla", mustWork=TRUE)
cts <- as.matrix(read.csv(pasCts,sep="\t",row.names="gene_id"))
coldata <- read.csv(pasAnno, row.names=1)
coldata <- coldata[,c("condition","type")]
coldata$condition <- factor(coldata$condition)
coldata$type <- factor(coldata$type)
rownames(coldata) <- sub("fb", "", rownames(coldata))
cts <- cts[, rownames(coldata)]
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = coldata,
design = ~ condition)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
glimmaMA(dds)